1. IMPORT ALL LIBRARIESΒΆ

InΒ [Β ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")

2. LOAD THE DATASETΒΆ

InΒ [Β ]:
data = pd.read_excel('./Surface_Water_Data_for_Gilgel_Gibe_Gauging_Station_1995_2021.xlsx')
data.head()
Out[Β ]:
Year Station Day Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
0 1995 Gilgel Gibe 1.0 3.712 2.456 2.456 2.456 14.302 14.73 35.643 49.872 95.614 31.646 10.647 5.439
1 1995 Gilgel Gibe 2.0 4.548 2.456 2.937 2.004 12.632 18.309 33.339 56.552 95.614 29.436 9.886 5.136
2 1995 Gilgel Gibe 3.0 4.548 2.456 2.937 1.791 12.226 14.302 31.089 66.374 100.556 27.281 10.264 5.136
3 1995 Gilgel Gibe 4.0 4.263 2.456 3.447 1.586 16.041 16.487 29.983 93.175 111.551 26.225 11.427 4.839
4 1995 Gilgel Gibe 5.0 4.263 2.456 2.693 1.586 11.427 19.245 32.207 99.726 122.93 25.183 11.035 4.548
InΒ [Β ]:
data.Station.value_counts()
Out[Β ]:
Station
Gilgel Gibe                        806
Gilgel Gibe                         31
Mean                                27
Flow million cubic meters (MCM)     27
Maximum                             27
Minimum                             27
Runoff (mm)                         27
Flow (cumecs)                        1
Name: count, dtype: int64
InΒ [Β ]:
# List of stations to keep
stations_to_keep = ["Mean", "Flow million cubic meters (MCM)", "Maximum", "Minimum", "Runoff (mm)"]

# Filter the dataframe
filtered_df = data[data["Station"].isin(stations_to_keep)]
filtered_df.tail()
Out[Β ]:
Year Station Day Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
968 2021 Mean NaN 11.428 3.615 2.879 7.517 47.851 33.342 100.918 162.894 102.853 80.198 11.949 5.373
969 2021 Flow million cubic meters (MCM) NaN 5.260 8.746 7.710 19.483 128.163 86.422 270.298 161.426 103.858 214.803 30.973 14.392
970 2021 Maximum NaN 3.106 8.051 6.054 21.527 65.531 71.922 158.868 162.16 103.355 167.661 17.875 7.063
971 2021 Minimum NaN 3.853 2.363 1.952 2.169 21.630 15.402 67.653 161.793 103.606 18.925 7.301 4.240
972 2021 Runoff (mm) NaN 3.479 2.949 2.599 6.569 43.211 29.138 91.132 161.976 103.229 72.422 10.443 4.852
InΒ [Β ]:
filtered_df.rename(columns={'Station':'Parameter'}, inplace=True)
filtered_df.drop(columns={'Day'}, inplace=True)
filtered_df.head()
Out[Β ]:
Year Parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
31 1995 Mean 3.261 3.074 2.937 8.122 12.226 15.264 51.185 93.986 79.09 10.647 8.008 13.043
32 1995 Flow million cubic meters (MCM) 8.735 7.436 2.539 21.051 11.863 39.564 45.452 65.424 205 18.852 20.758 6.241
33 1995 Maximum 4.548 4.548 6.800 22.146 31.774 35.062 121.738 175.231 144.908 50.494 11.427 16.716
34 1995 Minimum 2.226 2.456 4.548 1.586 16.041 8.07 75.965 99.726 33.91 31.646 5.749 13.878
35 1995 Runoff (mm) 2.945 2.507 1.389 7.098 6.384 13.339 29.983 49.872 69.117 10.647 6.999 4.548

3. TRANSFORM THE DATASETΒΆ

InΒ [Β ]:
# Remove extra spaces
filtered_df.columns = filtered_df.columns.str.strip()
filtered_df.head()
Out[Β ]:
Year Parameter Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
31 1995 Mean 3.261 3.074 2.937 8.122 12.226 15.264 51.185 93.986 79.09 10.647 8.008 13.043
32 1995 Flow million cubic meters (MCM) 8.735 7.436 2.539 21.051 11.863 39.564 45.452 65.424 205 18.852 20.758 6.241
33 1995 Maximum 4.548 4.548 6.800 22.146 31.774 35.062 121.738 175.231 144.908 50.494 11.427 16.716
34 1995 Minimum 2.226 2.456 4.548 1.586 16.041 8.07 75.965 99.726 33.91 31.646 5.749 13.878
35 1995 Runoff (mm) 2.945 2.507 1.389 7.098 6.384 13.339 29.983 49.872 69.117 10.647 6.999 4.548
InΒ [Β ]:
filtered_df.columns
Out[Β ]:
Index(['Year', 'Parameter', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
       'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
      dtype='object')
InΒ [Β ]:
def transform_data(df):
    # Melt the dataframe
    melted_df = df.melt(id_vars=["Parameter", "Year"],
                        value_vars=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
                        var_name="Month", value_name="Value")

    # Map month names to numbers
    month_mapping = {
        "Jan": "01", "Feb": "02", "Mar": "03", "Apr": "04", "May": "05", "Jun": "06",
        "Jul": "07", "Aug": "08", "Sep": "09", "Oct": "10", "Nov": "11", "Dec": "12"
    }
    melted_df["Month"] = melted_df["Month"].map(month_mapping)

    # Combine Year and Month into a Date column
    melted_df["Date"] = pd.to_datetime(melted_df["Year"].astype(str) + "-" + melted_df["Month"] + "-01")

    # Select and reorder the final columns
    transformed_df = melted_df[["Parameter", "Date", "Value"]]

    return transformed_df
InΒ [Β ]:
# Transform the data
transformed_data = transform_data(filtered_df)
InΒ [Β ]:
transformed_data.head()
Out[Β ]:
Parameter Date Value
0 Mean 1995-01-01 3.261
1 Flow million cubic meters (MCM) 1995-01-01 8.735
2 Maximum 1995-01-01 4.548
3 Minimum 1995-01-01 2.226
4 Runoff (mm) 1995-01-01 2.945
InΒ [Β ]:
# UNIQUE VALUES ONLY
unique_parameters = transformed_data['Parameter'].unique()
InΒ [Β ]:
for value in transformed_data['Value']:
    if isinstance(value, (int, float)):
        continue
    else:
        print(value)
InΒ [Β ]:
transformed_data.Value.isnull().sum()
Out[Β ]:
0
InΒ [Β ]:
# Convert 'Value' column to float
transformed_data['Value'] = transformed_data['Value'].astype(float)
InΒ [Β ]:
# RENAMA THE PARAMETERS
rename_dict = {
    unique_parameters[0]: 'Mean Flow',
    unique_parameters[1]: 'FLow',
    unique_parameters[2]: 'Max FLow',
    unique_parameters[3]: 'Min Flow',
    unique_parameters[4]: 'Runoff'
}

# Renaming the parameters
transformed_data['Parameter'] = transformed_data['Parameter'].replace(rename_dict)
transformed_data.head()
Out[Β ]:
Parameter Date Value
0 Mean Flow 1995-01-01 3.261
1 FLow 1995-01-01 8.735
2 Max FLow 1995-01-01 4.548
3 Min Flow 1995-01-01 2.226
4 Runoff 1995-01-01 2.945
InΒ [Β ]:
# Parameter counts
transformed_data.Parameter.value_counts()
Out[Β ]:
Parameter
Mean Flow    324
FLow         324
Max FLow     324
Min Flow     324
Runoff       324
Name: count, dtype: int64

4. PLOT VALUES OF EACH PARAMETERSΒΆ

InΒ [Β ]:
parameters = transformed_data['Parameter'].unique()

custom_colors = {
    parameters[0]: '#2ca02c',
    parameters[1]: '#8b0000',
    parameters[2]: '#ff6347',
    parameters[3]: '#8c564b',
    parameters[4]: '#ff7f0e',
}

plt.figure(figsize=(40, 10))

for parameter in parameters:
    parameter_data = transformed_data[transformed_data['Parameter'] == parameter]
    parameter_data.set_index('Date', inplace=True)
    sns.lineplot(data=parameter_data,
                 x=parameter_data.index,
                 y='Value',
                 marker='o',
                 label=parameter,
                 linewidth=1,
                 color=custom_colors[parameter])

# plt.title('Monthly Analysis', fontsize=24)
plt.xlabel('Date', fontsize=24)
plt.ylabel('Streamflow(m3/s)', fontsize=26)
plt.legend(loc='best', fontsize=24, framealpha=0.5)
plt.xticks(fontsize=24)
plt.yticks(fontsize=24)
plt.show()
No description has been provided for this image
InΒ [Β ]:
parameters = transformed_data['Parameter'].unique()
parameters
Out[Β ]:
array(['Mean Flow', 'FLow', 'Max FLow', 'Min Flow', 'Runoff'],
      dtype=object)

5. FIND THE CORRELATIONS BETWEEN EACH PARAMETERSΒΆ

InΒ [Β ]:
# Assuming transformed_data is already defined
pivot_data = transformed_data.pivot(index='Date', columns='Parameter', values='Value')
correlation_matrix = pivot_data.corr()

plt.figure(figsize=(16, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', square=True, linewidths=0.5, annot_kws={"size": 18})

# Adjust x-tick and y-tick labels rotation
plt.xticks(rotation=90, fontsize=20)
plt.yticks(rotation=0, fontsize=20)

plt.xlabel('')
plt.ylabel('')
plt.show()
No description has been provided for this image

6. COMMON FUNCTIONΒΆ

TIME SERIES TO SUPERVISEDΒΆ

InΒ [Β ]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols = []
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    agg = pd.concat(cols, axis=1)
    if dropnan:
        agg.dropna(inplace=True)
    return agg.values

ADD ROLLING FEATURESΒΆ

InΒ [Β ]:
def add_rolling_features(data, window=3):
    df = pd.DataFrame(data)
    df['rolling_mean'] = df.iloc[:, 0].rolling(window=window).mean()
    df['rolling_std'] = df.iloc[:, 0].rolling(window=window).std()
    df.dropna(inplace=True)
    return df

TRAIN TEST SPLITTΒΆ

InΒ [Β ]:
def train_test_split(data, train_size=0.9):
    n_train = int(len(data) * train_size)
    return data[:n_train], data[n_train:]

MEASURING METRICSΒΆ

InΒ [Β ]:
def nash_sutcliffe_efficiency(y_true, y_pred):
    return 1 - (np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2))
def willmotts_index(y_true, y_pred):
    return 1 - (np.sum((y_pred - y_true)**2) / np.sum((np.abs(y_pred - np.mean(y_true)) + np.abs(y_true - np.mean(y_true)))**2))

CROSS VALIDATION AND MODEL EVALUATIONSΒΆ

InΒ [Β ]:
def walk_forward_validation(data, model):
    predictions = []
    train, test = train_test_split(data)
    history = [x for x in train.values]
    for i in range(len(test)):
        testX, testy = test.iloc[i, :-1].values, test.iloc[i, -1]
        yhat = model(history, testX)
        predictions.append(yhat)
        history.append(test.iloc[i].values)
        print('>expected=%.1f, predicted=%.1f' % (testy, yhat))
    mae = mean_absolute_error(test.iloc[:, -1], predictions)
    rmse = np.sqrt(np.mean((test.iloc[:, -1] - np.asarray(predictions))**2))
    r2 = r2_score(test.iloc[:, -1], predictions)
    nse = nash_sutcliffe_efficiency(test.iloc[:, -1], predictions)
    willmott = willmotts_index(test.iloc[:, -1], predictions)
    return mae, rmse, r2, nse, willmott, test.index, test.iloc[:, -1], predictions

7. MODELSΒΆ

1. RANDOM FORESTΒΆ

InΒ [Β ]:
def random_forest_forecast(train, testX):
    train = np.asarray(train)
    trainX, trainy = train[:, :-1], train[:, -1]
    model = RandomForestRegressor(n_estimators=50)
    model.fit(trainX, trainy)
    yhat = model.predict([testX])
    return yhat[0]

2. SVMΒΆ

InΒ [Β ]:
from sklearn.svm import SVR
def svm_forecast(train, testX):
    train = np.asarray(train)
    trainX, trainy = train[:, :-1], train[:, -1]
    model = SVR(kernel='rbf')
    model.fit(trainX, trainy)
    yhat = model.predict([testX])
    return yhat[0]

3. XGBRegressorΒΆ

InΒ [Β ]:
from xgboost import XGBRegressor
def xgboost_forecast(train, testX):
    train = np.asarray(train)
    trainX, trainy = train[:, :-1], train[:, -1]
    model = XGBRegressor(n_estimators=50)
    model.fit(trainX, trainy)
    yhat = model.predict([testX])
    return yhat[0]

4. LGBMRegressorΒΆ

InΒ [Β ]:
from lightgbm import LGBMRegressor
def lightgbm_forecast(train, testX):
    train = np.asarray(train)
    trainX, trainy = train[:, :-1], train[:, -1]
    model = LGBMRegressor(n_estimators=50, verbose=-1)
    model.fit(trainX, trainy)
    yhat = model.predict([testX])
    return yhat[0]
InΒ [Β ]:
# Color dictionary for different models
colors_dict = {
    'LightGBM': '#27ad81',
    'XGBoost': '#5dc863',
    'SVM': '#aadc32',
    'Random Forest': '#fde725'
}

6. MEAN FLOW ANALYSISΒΆ

InΒ [Β ]:
# Get the mean values
mean_flow = transformed_data[transformed_data['Parameter'] == parameters[0]]
mean_flow.head()
Out[Β ]:
Parameter Date Value
0 Mean Flow 1995-01-01 3.261
5 Mean Flow 1996-01-01 9.181
10 Mean Flow 1997-01-01 17.626
15 Mean Flow 1998-01-01 74.108
20 Mean Flow 1999-01-01 26.971
InΒ [Β ]:
# drop the parameter columns
mean_flow = mean_flow.drop(columns=['Parameter'])
InΒ [Β ]:
mean_flow.head()
Out[Β ]:
Date Value
0 1995-01-01 3.261
5 1996-01-01 9.181
10 1997-01-01 17.626
15 1998-01-01 74.108
20 1999-01-01 26.971
InΒ [Β ]:
# sort according to  the date
mean_flow= mean_flow.sort_values(by='Date')
mean_flow.head()
Out[Β ]:
Date Value
0 1995-01-01 3.261
135 1995-02-01 3.074
270 1995-03-01 2.937
405 1995-04-01 8.122
540 1995-05-01 12.226
InΒ [Β ]:
mean_flow.head()
Out[Β ]:
Date Value
0 1995-01-01 3.261
135 1995-02-01 3.074
270 1995-03-01 2.937
405 1995-04-01 8.122
540 1995-05-01 12.226
InΒ [Β ]:
plt.figure(figsize=(20, 8))
mean_flow.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#2ca02c', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Mean Flow (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
No description has been provided for this image
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
    result = adfuller(values)
    labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
    for value, label in zip(result, labels):
        print(label+' : '+str(value))
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis(H0)")
    else:
        print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(mean_flow['Value'])
ADF Test Statistic : -3.547283596750242
p-value : 0.006852616092827493
#Lags Used : 13
Number of Observations Used : 310
Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
mean_flow = mean_flow[['Date', 'Value']]
mean_flow.set_index('Date', inplace=True)
mean_flow.head()
Out[Β ]:
Value
Date
1995-01-01 3.261
1995-02-01 3.074
1995-03-01 2.937
1995-04-01 8.122
1995-05-01 12.226
InΒ [Β ]:
model_names = []
mae_values = []
rmse_values = []
r2_values = []
nse_values = []
willmott_values = []

TRAIN THE MODELΒΆ

InΒ [Β ]:
# Assuming `mean flow` is your time series DataFrame
series = mean_flow
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
    ('LightGBM', lightgbm_forecast),
    ('XGBoost', xgboost_forecast),
    ('SVM', svm_forecast),
    ('Random Forest', random_forest_forecast)
]

# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()

# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
    mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
    
    model_names.append(model_name)
    mae_values.append(mae)
    rmse_values.append(rmse)
    r2_values.append(r2)
    nse_values.append(nse)
    willmott_values.append(willmott)

# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics_df = pd.DataFrame(metrics)

# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)



# Plot predictions for each model
for model_name, model_func in models:
    _, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
    plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)

plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Mean Flow(m3/s)', fontsize=18)
plt.legend(fontsize=14, loc='upper center', bbox_to_anchor=(0.6, 1.15), ncol=1)
plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)

plt.grid(axis='y', linestyle='--')

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()
>expected=0.3, predicted=0.1
>expected=2.7, predicted=1.6
>expected=2.4, predicted=1.4
>expected=0.5, predicted=1.7
>expected=1.9, predicted=1.1
>expected=1.3, predicted=2.1
>expected=0.2, predicted=0.7
>expected=-0.6, predicted=-0.6
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.4
>expected=0.9, predicted=0.8
>expected=3.2, predicted=1.6
>expected=2.0, predicted=1.6
>expected=-0.4, predicted=1.8
>expected=1.4, predicted=0.9
>expected=2.9, predicted=1.8
>expected=1.5, predicted=1.4
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-0.0, predicted=-0.2
>expected=-0.2, predicted=0.0
>expected=0.4, predicted=0.2
>expected=1.7, predicted=1.6
>expected=0.4, predicted=0.3
>expected=0.7, predicted=1.5
>expected=0.9, predicted=1.0
>expected=0.7, predicted=0.6
>expected=0.3, predicted=0.1
>expected=2.7, predicted=2.7
>expected=2.4, predicted=2.0
>expected=0.5, predicted=0.4
>expected=1.9, predicted=1.1
>expected=1.3, predicted=1.9
>expected=0.2, predicted=0.5
>expected=-0.6, predicted=-0.7
>expected=-0.8, predicted=-0.7
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.4
>expected=0.9, predicted=0.7
>expected=3.2, predicted=2.3
>expected=2.0, predicted=1.6
>expected=-0.4, predicted=2.5
>expected=1.4, predicted=1.1
>expected=2.9, predicted=1.4
>expected=1.5, predicted=1.4
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-0.0, predicted=0.0
>expected=-0.2, predicted=-0.1
>expected=0.4, predicted=0.3
>expected=1.7, predicted=1.7
>expected=0.4, predicted=0.1
>expected=0.7, predicted=1.5
>expected=0.9, predicted=0.8
>expected=0.7, predicted=0.5
>expected=0.3, predicted=0.1
>expected=2.7, predicted=2.4
>expected=2.4, predicted=1.6
>expected=0.5, predicted=0.7
>expected=1.9, predicted=1.3
>expected=1.3, predicted=1.0
>expected=0.2, predicted=0.3
>expected=-0.6, predicted=-0.6
>expected=-0.8, predicted=-0.7
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.3
>expected=0.9, predicted=0.9
>expected=3.2, predicted=2.5
>expected=2.0, predicted=1.3
>expected=-0.4, predicted=0.8
>expected=1.4, predicted=1.0
>expected=2.9, predicted=0.9
>expected=1.5, predicted=0.5
>expected=-0.9, predicted=0.3
>expected=-0.9, predicted=-0.2
>expected=-0.9, predicted=-0.4
>expected=-1.0, predicted=-0.8
>expected=-0.0, predicted=-0.4
>expected=-0.2, predicted=-0.3
>expected=0.4, predicted=0.2
>expected=1.7, predicted=1.2
>expected=0.4, predicted=0.7
>expected=0.7, predicted=1.1
>expected=0.9, predicted=1.1
>expected=0.7, predicted=0.3
>expected=0.3, predicted=0.1
>expected=2.7, predicted=1.9
>expected=2.4, predicted=1.3
>expected=0.5, predicted=1.7
>expected=1.9, predicted=1.3
>expected=1.3, predicted=1.7
>expected=0.2, predicted=0.5
>expected=-0.6, predicted=-0.7
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.4
>expected=0.9, predicted=0.6
>expected=3.2, predicted=2.2
>expected=2.0, predicted=2.2
>expected=-0.4, predicted=1.9
>expected=1.4, predicted=1.5
>expected=2.9, predicted=1.9
>expected=1.5, predicted=1.3
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-0.0, predicted=-0.1
>expected=-0.2, predicted=0.2
>expected=0.4, predicted=0.3
>expected=1.7, predicted=1.2
>expected=0.4, predicted=0.2
>expected=0.7, predicted=1.5
>expected=0.9, predicted=0.9
>expected=0.7, predicted=0.5
>expected=0.3, predicted=0.1
>expected=2.7, predicted=1.6
>expected=2.4, predicted=1.4
>expected=0.5, predicted=1.7
>expected=1.9, predicted=1.1
>expected=1.3, predicted=2.1
>expected=0.2, predicted=0.7
>expected=-0.6, predicted=-0.6
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.4
>expected=0.9, predicted=0.8
>expected=3.2, predicted=1.6
>expected=2.0, predicted=1.6
>expected=-0.4, predicted=1.8
>expected=1.4, predicted=0.9
>expected=2.9, predicted=1.8
>expected=1.5, predicted=1.4
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-0.0, predicted=-0.2
>expected=-0.2, predicted=0.0
>expected=0.4, predicted=0.2
>expected=1.7, predicted=1.6
>expected=0.4, predicted=0.3
>expected=0.7, predicted=1.5
>expected=0.9, predicted=1.0
>expected=0.7, predicted=0.6
>expected=0.3, predicted=0.1
>expected=2.7, predicted=2.7
>expected=2.4, predicted=2.0
>expected=0.5, predicted=0.4
>expected=1.9, predicted=1.1
>expected=1.3, predicted=1.9
>expected=0.2, predicted=0.5
>expected=-0.6, predicted=-0.7
>expected=-0.8, predicted=-0.7
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.4
>expected=0.9, predicted=0.7
>expected=3.2, predicted=2.3
>expected=2.0, predicted=1.6
>expected=-0.4, predicted=2.5
>expected=1.4, predicted=1.1
>expected=2.9, predicted=1.4
>expected=1.5, predicted=1.4
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-0.0, predicted=0.0
>expected=-0.2, predicted=-0.1
>expected=0.4, predicted=0.3
>expected=1.7, predicted=1.7
>expected=0.4, predicted=0.1
>expected=0.7, predicted=1.5
>expected=0.9, predicted=0.8
>expected=0.7, predicted=0.5
>expected=0.3, predicted=0.1
>expected=2.7, predicted=2.4
>expected=2.4, predicted=1.6
>expected=0.5, predicted=0.7
>expected=1.9, predicted=1.3
>expected=1.3, predicted=1.0
>expected=0.2, predicted=0.3
>expected=-0.6, predicted=-0.6
>expected=-0.8, predicted=-0.7
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.3
>expected=0.9, predicted=0.9
>expected=3.2, predicted=2.5
>expected=2.0, predicted=1.3
>expected=-0.4, predicted=0.8
>expected=1.4, predicted=1.0
>expected=2.9, predicted=0.9
>expected=1.5, predicted=0.5
>expected=-0.9, predicted=0.3
>expected=-0.9, predicted=-0.2
>expected=-0.9, predicted=-0.4
>expected=-1.0, predicted=-0.8
>expected=-0.0, predicted=-0.4
>expected=-0.2, predicted=-0.3
>expected=0.4, predicted=0.2
>expected=1.7, predicted=1.2
>expected=0.4, predicted=0.7
>expected=0.7, predicted=1.1
>expected=0.9, predicted=1.1
>expected=0.7, predicted=0.3
>expected=0.3, predicted=0.0
>expected=2.7, predicted=1.9
>expected=2.4, predicted=1.4
>expected=0.5, predicted=1.8
>expected=1.9, predicted=1.3
>expected=1.3, predicted=1.5
>expected=0.2, predicted=0.5
>expected=-0.6, predicted=-0.7
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.3, predicted=-0.3
>expected=0.9, predicted=0.7
>expected=3.2, predicted=2.3
>expected=2.0, predicted=2.1
>expected=-0.4, predicted=1.9
>expected=1.4, predicted=1.3
>expected=2.9, predicted=2.0
>expected=1.5, predicted=1.3
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-0.0, predicted=-0.1
>expected=-0.2, predicted=0.2
>expected=0.4, predicted=0.3
>expected=1.7, predicted=1.6
>expected=0.4, predicted=0.4
>expected=0.7, predicted=1.5
>expected=0.9, predicted=1.0
>expected=0.7, predicted=0.5
No description has been provided for this image

EVALUATE THE MODELΒΆ

InΒ [Β ]:
# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
Model MAE RMSE R-squared Nash-Sutcliffe Efficiency Willmott's Index of Agreement
0 LightGBM 0.428523 0.683382 0.695610 0.695610 0.904407
1 XGBoost 0.346418 0.656517 0.719072 0.719072 0.920369
2 SVM 0.429519 0.607130 0.759748 0.759748 0.915041
3 Random Forest 0.382435 0.612480 0.755495 0.755495 0.927202
InΒ [Β ]:
metrics = metrics.round(2)

fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')

table = ax.table(cellText=metrics.values,
                 colLabels=metrics.columns,
                 cellLoc='center',
                 loc='center',
                 bbox=[0, 0, 1, 1])

table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)

header_color = '#D0D0D0'
data_color = '#FFFFFF'

for i, key in enumerate(table._cells):
    cell = table._cells[key]
    cell.set_edgecolor('black')
    if key[0] == 0 or key[1] == -1:
        cell.set_text_props(weight='bold')
        cell.set_facecolor(header_color)
    else:
        cell.set_facecolor(data_color)

plt.title('Models Performance on Mean Flow')
plt.show()
No description has been provided for this image
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4

# Metrics and Titles
metrics_title = {
    'MAE': 'Mean Absolute Error (MAE)',
    'RMSE': 'Root Mean Squared Error (RMSE)',
    'R-squared': 'R-squared',
    'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
    'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}

# Plot each metric separately
for metric, title in metrics_title.items():
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
    # ax.set_title(title, fontsize=16)
    ax.set_ylabel(metric, fontsize=16)
    ax.tick_params(axis='x', rotation=30, labelsize=16)
    ax.tick_params(axis='y', labelsize=16)
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

7. FLOW ANALYSISΒΆ

InΒ [Β ]:
# Get the temp_max values
flow = transformed_data[transformed_data['Parameter'] == parameters[1]]
flow.head()
Out[Β ]:
Parameter Date Value
1 FLow 1995-01-01 8.735
6 FLow 1996-01-01 9.706
11 FLow 1997-01-01 9.886
16 FLow 1998-01-01 39.197
21 FLow 1999-01-01 13.043
InΒ [Β ]:
# drop the parameter columns
flow = flow.drop(columns=['Parameter'])
InΒ [Β ]:
flow.head()
Out[Β ]:
Date Value
1 1995-01-01 8.735
6 1996-01-01 9.706
11 1997-01-01 9.886
16 1998-01-01 39.197
21 1999-01-01 13.043
InΒ [Β ]:
# sort according to  the date
flow= flow.sort_values(by='Date')
flow.head()
Out[Β ]:
Date Value
1 1995-01-01 8.735
136 1995-02-01 7.436
271 1995-03-01 2.539
406 1995-04-01 21.051
541 1995-05-01 11.863
InΒ [Β ]:
flow.head()
Out[Β ]:
Date Value
1 1995-01-01 8.735
136 1995-02-01 7.436
271 1995-03-01 2.539
406 1995-04-01 21.051
541 1995-05-01 11.863
InΒ [Β ]:
plt.figure(figsize=(20, 8))
flow.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#8b0000', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Flow (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
No description has been provided for this image
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
    result = adfuller(values)
    labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
    for value, label in zip(result, labels):
        print(label+' : '+str(value))
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis(H0)")
    else:
        print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(flow['Value'])
ADF Test Statistic : -3.9157577063379865
p-value : 0.0019218914853642902
#Lags Used : 11
Number of Observations Used : 312
Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
flow = flow[['Date', 'Value']]
flow.set_index('Date', inplace=True)
flow.head()
Out[Β ]:
Value
Date
1995-01-01 8.735
1995-02-01 7.436
1995-03-01 2.539
1995-04-01 21.051
1995-05-01 11.863
InΒ [Β ]:
flow_model_names = []
flow_mae_values = []
flow_rmse_values = []
flow_r2_values = []
flow_nse_values = []
flow_willmott_values = []

TRAIN THE MODELSΒΆ

InΒ [Β ]:
# Assuming `temp_max` is your time series DataFrame
series = flow
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
    ('LightGBM', lightgbm_forecast),
    ('XGBoost', xgboost_forecast),
    ('SVM', svm_forecast),
    ('Random Forest', random_forest_forecast)
]

# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()

# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
    mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
    
    model_names.append(model_name)
    mae_values.append(mae)
    rmse_values.append(rmse)
    r2_values.append(r2)
    nse_values.append(nse)
    willmott_values.append(willmott)

# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics_df = pd.DataFrame(metrics)

# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)



for model_name, model_func in models:
    _, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
    plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)

plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Flow (m3/s)', fontsize=18)

plt.legend(fontsize=14, loc='upper right', bbox_to_anchor=(0.6, 1.15), ncol=1)

plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)

plt.grid(axis='y', linestyle='--')

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()
>expected=0.5, predicted=0.1
>expected=0.5, predicted=0.2
>expected=-0.6, predicted=0.2
>expected=0.8, predicted=0.9
>expected=0.7, predicted=0.2
>expected=1.6, predicted=1.5
>expected=0.3, predicted=0.7
>expected=-0.5, predicted=-0.4
>expected=-0.6, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=-0.2, predicted=0.0
>expected=1.1, predicted=0.9
>expected=0.5, predicted=0.7
>expected=-0.7, predicted=-0.5
>expected=-0.7, predicted=-0.1
>expected=-0.1, predicted=0.5
>expected=1.4, predicted=0.4
>expected=1.8, predicted=1.5
>expected=-0.7, predicted=-0.6
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-1.0, predicted=-1.0
>expected=0.1, predicted=-0.2
>expected=-0.1, predicted=0.0
>expected=0.7, predicted=0.5
>expected=0.6, predicted=0.4
>expected=0.4, predicted=0.2
>expected=-0.1, predicted=0.1
>expected=0.6, predicted=0.4
>expected=0.9, predicted=0.7
>expected=0.5, predicted=0.1
>expected=0.5, predicted=0.2
>expected=-0.6, predicted=0.4
>expected=0.8, predicted=1.2
>expected=0.7, predicted=0.1
>expected=1.6, predicted=1.4
>expected=0.3, predicted=0.6
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.8
>expected=-0.9, predicted=-1.0
>expected=-0.9, predicted=-0.9
>expected=-0.8, predicted=-0.8
>expected=-0.2, predicted=-0.2
>expected=1.1, predicted=1.0
>expected=0.5, predicted=0.5
>expected=-0.7, predicted=-0.2
>expected=-0.7, predicted=-0.2
>expected=-0.1, predicted=-0.2
>expected=1.4, predicted=0.6
>expected=1.8, predicted=1.4
>expected=-0.7, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-1.0, predicted=-1.0
>expected=0.1, predicted=-0.0
>expected=-0.1, predicted=-0.1
>expected=0.7, predicted=0.4
>expected=0.6, predicted=0.2
>expected=0.4, predicted=-0.3
>expected=-0.1, predicted=0.1
>expected=0.6, predicted=0.5
>expected=0.9, predicted=0.7
>expected=0.5, predicted=0.2
>expected=0.5, predicted=0.5
>expected=-0.6, predicted=-0.0
>expected=0.8, predicted=0.2
>expected=0.7, predicted=0.8
>expected=1.6, predicted=1.6
>expected=0.3, predicted=0.6
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.8
>expected=-0.9, predicted=-1.0
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=-0.2, predicted=-0.3
>expected=1.1, predicted=1.1
>expected=0.5, predicted=0.6
>expected=-0.7, predicted=-0.0
>expected=-0.7, predicted=0.1
>expected=-0.1, predicted=0.3
>expected=1.4, predicted=0.6
>expected=1.8, predicted=1.0
>expected=-0.7, predicted=-0.5
>expected=-0.9, predicted=-0.8
>expected=-1.0, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=0.1, predicted=-0.3
>expected=-0.1, predicted=-0.2
>expected=0.7, predicted=0.4
>expected=0.6, predicted=0.1
>expected=0.4, predicted=0.3
>expected=-0.1, predicted=-0.3
>expected=0.6, predicted=-0.0
>expected=0.9, predicted=0.3
>expected=0.5, predicted=0.0
>expected=0.5, predicted=0.3
>expected=-0.6, predicted=0.4
>expected=0.8, predicted=0.8
>expected=0.7, predicted=0.6
>expected=1.6, predicted=1.5
>expected=0.3, predicted=0.6
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.8, predicted=-0.8
>expected=-0.2, predicted=-0.2
>expected=1.1, predicted=0.7
>expected=0.5, predicted=0.6
>expected=-0.7, predicted=-0.1
>expected=-0.7, predicted=-0.1
>expected=-0.1, predicted=0.2
>expected=1.4, predicted=0.6
>expected=1.8, predicted=1.7
>expected=-0.7, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-1.0, predicted=-1.0
>expected=0.1, predicted=-0.1
>expected=-0.1, predicted=0.2
>expected=0.7, predicted=0.6
>expected=0.6, predicted=0.2
>expected=0.4, predicted=0.2
>expected=-0.1, predicted=0.1
>expected=0.6, predicted=0.2
>expected=0.9, predicted=0.7
>expected=0.5, predicted=0.1
>expected=0.5, predicted=0.2
>expected=-0.6, predicted=0.2
>expected=0.8, predicted=0.9
>expected=0.7, predicted=0.2
>expected=1.6, predicted=1.5
>expected=0.3, predicted=0.7
>expected=-0.5, predicted=-0.4
>expected=-0.6, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=-0.2, predicted=0.0
>expected=1.1, predicted=0.9
>expected=0.5, predicted=0.7
>expected=-0.7, predicted=-0.5
>expected=-0.7, predicted=-0.1
>expected=-0.1, predicted=0.5
>expected=1.4, predicted=0.4
>expected=1.8, predicted=1.5
>expected=-0.7, predicted=-0.6
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-1.0, predicted=-1.0
>expected=0.1, predicted=-0.2
>expected=-0.1, predicted=0.0
>expected=0.7, predicted=0.5
>expected=0.6, predicted=0.4
>expected=0.4, predicted=0.2
>expected=-0.1, predicted=0.1
>expected=0.6, predicted=0.4
>expected=0.9, predicted=0.7
>expected=0.5, predicted=0.1
>expected=0.5, predicted=0.2
>expected=-0.6, predicted=0.4
>expected=0.8, predicted=1.2
>expected=0.7, predicted=0.1
>expected=1.6, predicted=1.4
>expected=0.3, predicted=0.6
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.8
>expected=-0.9, predicted=-1.0
>expected=-0.9, predicted=-0.9
>expected=-0.8, predicted=-0.8
>expected=-0.2, predicted=-0.2
>expected=1.1, predicted=1.0
>expected=0.5, predicted=0.5
>expected=-0.7, predicted=-0.2
>expected=-0.7, predicted=-0.2
>expected=-0.1, predicted=-0.2
>expected=1.4, predicted=0.6
>expected=1.8, predicted=1.4
>expected=-0.7, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-1.0, predicted=-1.0
>expected=0.1, predicted=-0.0
>expected=-0.1, predicted=-0.1
>expected=0.7, predicted=0.4
>expected=0.6, predicted=0.2
>expected=0.4, predicted=-0.3
>expected=-0.1, predicted=0.1
>expected=0.6, predicted=0.5
>expected=0.9, predicted=0.7
>expected=0.5, predicted=0.2
>expected=0.5, predicted=0.5
>expected=-0.6, predicted=-0.0
>expected=0.8, predicted=0.2
>expected=0.7, predicted=0.8
>expected=1.6, predicted=1.6
>expected=0.3, predicted=0.6
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.8
>expected=-0.9, predicted=-1.0
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=-0.2, predicted=-0.3
>expected=1.1, predicted=1.1
>expected=0.5, predicted=0.6
>expected=-0.7, predicted=-0.0
>expected=-0.7, predicted=0.1
>expected=-0.1, predicted=0.3
>expected=1.4, predicted=0.6
>expected=1.8, predicted=1.0
>expected=-0.7, predicted=-0.5
>expected=-0.9, predicted=-0.8
>expected=-1.0, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=0.1, predicted=-0.3
>expected=-0.1, predicted=-0.2
>expected=0.7, predicted=0.4
>expected=0.6, predicted=0.1
>expected=0.4, predicted=0.3
>expected=-0.1, predicted=-0.3
>expected=0.6, predicted=-0.0
>expected=0.9, predicted=0.3
>expected=0.5, predicted=0.1
>expected=0.5, predicted=0.4
>expected=-0.6, predicted=0.4
>expected=0.8, predicted=0.8
>expected=0.7, predicted=0.4
>expected=1.6, predicted=1.5
>expected=0.3, predicted=0.5
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.8, predicted=-0.8
>expected=-0.2, predicted=-0.2
>expected=1.1, predicted=0.8
>expected=0.5, predicted=0.6
>expected=-0.7, predicted=-0.2
>expected=-0.7, predicted=0.1
>expected=-0.1, predicted=0.2
>expected=1.4, predicted=0.4
>expected=1.8, predicted=1.7
>expected=-0.7, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-1.0, predicted=-1.0
>expected=0.1, predicted=-0.1
>expected=-0.1, predicted=0.2
>expected=0.7, predicted=0.5
>expected=0.6, predicted=0.4
>expected=0.4, predicted=0.4
>expected=-0.1, predicted=0.2
>expected=0.6, predicted=0.4
>expected=0.9, predicted=0.8
No description has been provided for this image

EVALUATE THE MODELΒΆ

InΒ [Β ]:
# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
Model MAE RMSE R-squared Nash-Sutcliffe Efficiency Willmott's Index of Agreement
0 LightGBM 0.251519 0.336681 0.823571 0.823571 0.947015
1 XGBoost 0.257676 0.357689 0.800866 0.800866 0.941150
2 SVM 0.287994 0.386812 0.767119 0.767119 0.926692
3 Random Forest 0.251434 0.344577 0.815198 0.815198 0.944750
InΒ [Β ]:
metrics = metrics.round(2)

fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')

table = ax.table(cellText=metrics.values,
                 colLabels=metrics.columns,
                 cellLoc='center',
                 loc='center',
                 bbox=[0, 0, 1, 1])

table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)

header_color = '#D0D0D0'
data_color = '#FFFFFF'

for i, key in enumerate(table._cells):
    cell = table._cells[key]
    cell.set_edgecolor('black')
    if key[0] == 0 or key[1] == -1:
        cell.set_text_props(weight='bold')
        cell.set_facecolor(header_color)
    else:
        cell.set_facecolor(data_color)

plt.title('Models Performance on Flow')
plt.show()
No description has been provided for this image
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4

# Metrics and Titles
metrics_title = {
    'MAE': 'Mean Absolute Error (MAE)',
    'RMSE': 'Root Mean Squared Error (RMSE)',
    'R-squared': 'R-squared',
    'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
    'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}

# Plot each metric separately
for metric, title in metrics_title.items():
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
    # ax.set_title(title, fontsize=16)
    ax.set_ylabel(metric, fontsize=16)
    ax.tick_params(axis='x', rotation=30, labelsize=16)
    ax.tick_params(axis='y', labelsize=16)
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

8. MAX FLOW ANALYSISΒΆ

InΒ [Β ]:
# Get the temp_max values
flow_max = transformed_data[transformed_data['Parameter'] == parameters[2]]
flow_max.head()
Out[Β ]:
Parameter Date Value
2 Max FLow 1995-01-01 4.548
7 Max FLow 1996-01-01 9.443
12 Max FLow 1997-01-01 4.839
17 Max FLow 1998-01-01 19.245
22 Max FLow 1999-01-01 7.722
InΒ [Β ]:
# drop the parameter columns
flow_max = flow_max.drop(columns=['Parameter'])
InΒ [Β ]:
flow_max.head()
Out[Β ]:
Date Value
2 1995-01-01 4.548
7 1996-01-01 9.443
12 1997-01-01 4.839
17 1998-01-01 19.245
22 1999-01-01 7.722
InΒ [Β ]:
# sort according to  the date
flow_max= flow_max.sort_values(by='Date')
flow_max.head()
Out[Β ]:
Date Value
2 1995-01-01 4.548
137 1995-02-01 4.548
272 1995-03-01 6.800
407 1995-04-01 22.146
542 1995-05-01 31.774
InΒ [Β ]:
flow_max.head()
Out[Β ]:
Date Value
2 1995-01-01 4.548
137 1995-02-01 4.548
272 1995-03-01 6.800
407 1995-04-01 22.146
542 1995-05-01 31.774
InΒ [Β ]:
plt.figure(figsize=(20, 8))
flow_max.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#ff6347', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Max Flow (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
No description has been provided for this image
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
    result = adfuller(values)
    labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
    for value, label in zip(result, labels):
        print(label+' : '+str(value))
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis(H0)")
    else:
        print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(flow_max['Value'])
ADF Test Statistic : -3.3348205010962766
p-value : 0.013391956092490444
#Lags Used : 11
Number of Observations Used : 312
Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
flow_max = flow_max[['Date', 'Value']]
flow_max.set_index('Date', inplace=True)
flow_max.head()
Out[Β ]:
Value
Date
1995-01-01 4.548
1995-02-01 4.548
1995-03-01 6.800
1995-04-01 22.146
1995-05-01 31.774

TRAIN THE MODELΒΆ

InΒ [Β ]:
# Assuming `flow_max` is your time series DataFrame
series = flow_max
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
    ('LightGBM', lightgbm_forecast),
    ('XGBoost', xgboost_forecast),
    ('SVM', svm_forecast),
    ('Random Forest', random_forest_forecast)
]

# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()

# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
    mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
    
    model_names.append(model_name)
    mae_values.append(mae)
    rmse_values.append(rmse)
    r2_values.append(r2)
    nse_values.append(nse)
    willmott_values.append(willmott)

# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics_df = pd.DataFrame(metrics)

# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)



for model_name, model_func in models:
    _, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
    plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)

plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Max Flow (m3/s)', fontsize=18)

plt.legend(fontsize=14, loc='upper right', bbox_to_anchor=(0.6, 1.15), ncol=1)

plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)
plt.grid(axis='y', linestyle='--')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()
>expected=0.5, predicted=-0.0
>expected=1.4, predicted=1.1
>expected=0.6, predicted=0.9
>expected=-0.5, predicted=1.1
>expected=-0.2, predicted=1.1
>expected=1.0, predicted=1.8
>expected=0.5, predicted=1.1
>expected=-0.0, predicted=0.2
>expected=-0.1, predicted=-0.3
>expected=-1.0, predicted=-0.9
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=0.1, predicted=0.0
>expected=1.1, predicted=0.9
>expected=1.1, predicted=1.5
>expected=-0.1, predicted=0.7
>expected=-0.6, predicted=0.8
>expected=-0.6, predicted=1.6
>expected=1.9, predicted=1.9
>expected=1.9, predicted=1.8
>expected=-0.8, predicted=-0.6
>expected=-0.9, predicted=-1.0
>expected=-1.1, predicted=-1.0
>expected=-0.9, predicted=-0.9
>expected=-0.2, predicted=-0.4
>expected=-0.3, predicted=-0.1
>expected=0.4, predicted=0.5
>expected=0.4, predicted=0.4
>expected=-0.1, predicted=-0.4
>expected=-0.1, predicted=-0.1
>expected=1.1, predicted=0.9
>expected=1.6, predicted=0.8
>expected=0.5, predicted=0.2
>expected=1.4, predicted=0.9
>expected=0.6, predicted=0.8
>expected=-0.5, predicted=0.4
>expected=-0.2, predicted=0.7
>expected=1.0, predicted=1.7
>expected=0.5, predicted=0.7
>expected=-0.0, predicted=0.2
>expected=-0.1, predicted=-0.2
>expected=-1.0, predicted=-0.9
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=0.1, predicted=-0.1
>expected=1.1, predicted=0.7
>expected=1.1, predicted=1.2
>expected=-0.1, predicted=0.3
>expected=-0.6, predicted=0.0
>expected=-0.6, predicted=1.4
>expected=1.9, predicted=2.3
>expected=1.9, predicted=1.7
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-1.1, predicted=-1.0
>expected=-0.9, predicted=-0.9
>expected=-0.2, predicted=-0.2
>expected=-0.3, predicted=-0.1
>expected=0.4, predicted=0.5
>expected=0.4, predicted=0.2
>expected=-0.1, predicted=0.0
>expected=-0.1, predicted=-0.3
>expected=1.1, predicted=0.5
>expected=1.6, predicted=0.8
>expected=0.5, predicted=0.2
>expected=1.4, predicted=1.4
>expected=0.6, predicted=0.8
>expected=-0.5, predicted=0.2
>expected=-0.2, predicted=0.4
>expected=1.0, predicted=1.1
>expected=0.5, predicted=0.9
>expected=-0.0, predicted=-0.1
>expected=-0.1, predicted=-0.3
>expected=-1.0, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-0.8, predicted=-0.8
>expected=0.1, predicted=0.0
>expected=1.1, predicted=1.2
>expected=1.1, predicted=1.2
>expected=-0.1, predicted=0.3
>expected=-0.6, predicted=0.2
>expected=-0.6, predicted=0.3
>expected=1.9, predicted=1.1
>expected=1.9, predicted=1.0
>expected=-0.8, predicted=-0.4
>expected=-0.9, predicted=-0.6
>expected=-1.1, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.2, predicted=-0.3
>expected=-0.3, predicted=-0.1
>expected=0.4, predicted=0.3
>expected=0.4, predicted=0.4
>expected=-0.1, predicted=0.1
>expected=-0.1, predicted=-0.1
>expected=1.1, predicted=0.6
>expected=1.6, predicted=1.0
>expected=0.5, predicted=-0.1
>expected=1.4, predicted=0.8
>expected=0.6, predicted=0.7
>expected=-0.5, predicted=0.7
>expected=-0.2, predicted=0.6
>expected=1.0, predicted=1.7
>expected=0.5, predicted=0.7
>expected=-0.0, predicted=0.2
>expected=-0.1, predicted=-0.4
>expected=-1.0, predicted=-1.0
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=0.1, predicted=0.1
>expected=1.1, predicted=0.8
>expected=1.1, predicted=1.1
>expected=-0.1, predicted=0.7
>expected=-0.6, predicted=0.6
>expected=-0.6, predicted=1.7
>expected=1.9, predicted=1.7
>expected=1.9, predicted=1.7
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-1.0
>expected=-1.1, predicted=-1.1
>expected=-0.9, predicted=-0.9
>expected=-0.2, predicted=-0.3
>expected=-0.3, predicted=0.1
>expected=0.4, predicted=0.5
>expected=0.4, predicted=0.3
>expected=-0.1, predicted=-0.2
>expected=-0.1, predicted=-0.1
>expected=1.1, predicted=0.6
>expected=1.6, predicted=0.7
>expected=0.5, predicted=-0.0
>expected=1.4, predicted=1.1
>expected=0.6, predicted=0.9
>expected=-0.5, predicted=1.1
>expected=-0.2, predicted=1.1
>expected=1.0, predicted=1.8
>expected=0.5, predicted=1.1
>expected=-0.0, predicted=0.2
>expected=-0.1, predicted=-0.3
>expected=-1.0, predicted=-0.9
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=0.1, predicted=0.0
>expected=1.1, predicted=0.9
>expected=1.1, predicted=1.5
>expected=-0.1, predicted=0.7
>expected=-0.6, predicted=0.8
>expected=-0.6, predicted=1.6
>expected=1.9, predicted=1.9
>expected=1.9, predicted=1.8
>expected=-0.8, predicted=-0.6
>expected=-0.9, predicted=-1.0
>expected=-1.1, predicted=-1.0
>expected=-0.9, predicted=-0.9
>expected=-0.2, predicted=-0.4
>expected=-0.3, predicted=-0.1
>expected=0.4, predicted=0.5
>expected=0.4, predicted=0.4
>expected=-0.1, predicted=-0.4
>expected=-0.1, predicted=-0.1
>expected=1.1, predicted=0.9
>expected=1.6, predicted=0.8
>expected=0.5, predicted=0.2
>expected=1.4, predicted=0.9
>expected=0.6, predicted=0.8
>expected=-0.5, predicted=0.4
>expected=-0.2, predicted=0.7
>expected=1.0, predicted=1.7
>expected=0.5, predicted=0.7
>expected=-0.0, predicted=0.2
>expected=-0.1, predicted=-0.2
>expected=-1.0, predicted=-0.9
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=0.1, predicted=-0.1
>expected=1.1, predicted=0.7
>expected=1.1, predicted=1.2
>expected=-0.1, predicted=0.3
>expected=-0.6, predicted=0.0
>expected=-0.6, predicted=1.4
>expected=1.9, predicted=2.3
>expected=1.9, predicted=1.7
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-1.1, predicted=-1.0
>expected=-0.9, predicted=-0.9
>expected=-0.2, predicted=-0.2
>expected=-0.3, predicted=-0.1
>expected=0.4, predicted=0.5
>expected=0.4, predicted=0.2
>expected=-0.1, predicted=0.0
>expected=-0.1, predicted=-0.3
>expected=1.1, predicted=0.5
>expected=1.6, predicted=0.8
>expected=0.5, predicted=0.2
>expected=1.4, predicted=1.4
>expected=0.6, predicted=0.8
>expected=-0.5, predicted=0.2
>expected=-0.2, predicted=0.4
>expected=1.0, predicted=1.1
>expected=0.5, predicted=0.9
>expected=-0.0, predicted=-0.1
>expected=-0.1, predicted=-0.3
>expected=-1.0, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-0.8, predicted=-0.8
>expected=0.1, predicted=0.0
>expected=1.1, predicted=1.2
>expected=1.1, predicted=1.2
>expected=-0.1, predicted=0.3
>expected=-0.6, predicted=0.2
>expected=-0.6, predicted=0.3
>expected=1.9, predicted=1.1
>expected=1.9, predicted=1.0
>expected=-0.8, predicted=-0.4
>expected=-0.9, predicted=-0.6
>expected=-1.1, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.2, predicted=-0.3
>expected=-0.3, predicted=-0.1
>expected=0.4, predicted=0.3
>expected=0.4, predicted=0.4
>expected=-0.1, predicted=0.1
>expected=-0.1, predicted=-0.1
>expected=1.1, predicted=0.6
>expected=1.6, predicted=1.0
>expected=0.5, predicted=0.0
>expected=1.4, predicted=0.9
>expected=0.6, predicted=0.6
>expected=-0.5, predicted=0.6
>expected=-0.2, predicted=0.8
>expected=1.0, predicted=1.7
>expected=0.5, predicted=0.6
>expected=-0.0, predicted=0.2
>expected=-0.1, predicted=-0.3
>expected=-1.0, predicted=-1.0
>expected=-0.9, predicted=-1.0
>expected=-0.8, predicted=-0.8
>expected=0.1, predicted=0.1
>expected=1.1, predicted=0.7
>expected=1.1, predicted=1.3
>expected=-0.1, predicted=0.7
>expected=-0.6, predicted=0.6
>expected=-0.6, predicted=1.7
>expected=1.9, predicted=1.8
>expected=1.9, predicted=1.6
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-1.1, predicted=-1.1
>expected=-0.9, predicted=-0.9
>expected=-0.2, predicted=-0.3
>expected=-0.3, predicted=0.0
>expected=0.4, predicted=0.5
>expected=0.4, predicted=0.4
>expected=-0.1, predicted=-0.1
>expected=-0.1, predicted=0.0
>expected=1.1, predicted=0.6
>expected=1.6, predicted=0.6
No description has been provided for this image

EVALUATE THE MODELΒΆ

InΒ [Β ]:
# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
Model MAE RMSE R-squared Nash-Sutcliffe Efficiency Willmott's Index of Agreement
0 LightGBM 0.417202 0.663100 0.421248 0.421248 0.860371
1 XGBoost 0.345963 0.523519 0.639255 0.639255 0.905622
2 SVM 0.312871 0.418174 0.769830 0.769830 0.924929
3 Random Forest 0.372875 0.616156 0.500292 0.500292 0.866312
InΒ [Β ]:
metrics = metrics.round(2)

fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')

table = ax.table(cellText=metrics.values,
                 colLabels=metrics.columns,
                 cellLoc='center',
                 loc='center',
                 bbox=[0, 0, 1, 1])

table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)

header_color = '#D0D0D0'
data_color = '#FFFFFF'

for i, key in enumerate(table._cells):
    cell = table._cells[key]
    cell.set_edgecolor('black')
    if key[0] == 0 or key[1] == -1:
        cell.set_text_props(weight='bold')
        cell.set_facecolor(header_color)
    else:
        cell.set_facecolor(data_color)

plt.title('Models Performance on Max FLow')
plt.show()
No description has been provided for this image
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4

# Metrics and Titles
metrics_title = {
    'MAE': 'Mean Absolute Error (MAE)',
    'RMSE': 'Root Mean Squared Error (RMSE)',
    'R-squared': 'R-squared',
    'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
    'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}

# Plot each metric separately
for metric, title in metrics_title.items():
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
    # ax.set_title(title, fontsize=16)
    ax.set_ylabel(metric, fontsize=16)
    ax.tick_params(axis='x', rotation=30, labelsize=16)
    ax.tick_params(axis='y', labelsize=16)
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

9. MIN FLOW ANALYSISΒΆ

InΒ [Β ]:
flow_min = transformed_data[transformed_data['Parameter']=='Min Flow']
InΒ [Β ]:
# drop the parameter columns
flow_min = flow_min.drop(columns=['Parameter'])
InΒ [Β ]:
flow_min.head()
Out[Β ]:
Date Value
3 1995-01-01 2.226
8 1996-01-01 9.574
13 1997-01-01 5.943
18 1998-01-01 24.986
23 1999-01-01 9.093
InΒ [Β ]:
# sort according to  the date
flow_min = flow_min.sort_values(by='Date')
flow_min.head()
Out[Β ]:
Date Value
3 1995-01-01 2.226
138 1995-02-01 2.456
273 1995-03-01 4.548
408 1995-04-01 1.586
543 1995-05-01 16.041
InΒ [Β ]:
flow_min.head()
Out[Β ]:
Date Value
3 1995-01-01 2.226
138 1995-02-01 2.456
273 1995-03-01 4.548
408 1995-04-01 1.586
543 1995-05-01 16.041
InΒ [Β ]:
plt.figure(figsize=(20, 8))
flow_min.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#8c564b', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Min Flow (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
No description has been provided for this image
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
    result = adfuller(values)
    labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
    for value, label in zip(result, labels):
        print(label+' : '+str(value))
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis(H0)")
    else:
        print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(flow_min['Value'])
ADF Test Statistic : -2.990114823998536
p-value : 0.03582792052802385
#Lags Used : 13
Number of Observations Used : 310
Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
flow_min = flow_min[['Date', 'Value']]
flow_min.set_index('Date', inplace=True)
flow_min.head()
Out[Β ]:
Value
Date
1995-01-01 2.226
1995-02-01 2.456
1995-03-01 4.548
1995-04-01 1.586
1995-05-01 16.041

TRAIN THE MODELΒΆ

InΒ [Β ]:
# Assuming `flow min` is your time series DataFrame
series = flow_min
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
    ('LightGBM', lightgbm_forecast),
    ('XGBoost', xgboost_forecast),
    ('SVM', svm_forecast),
    ('Random Forest', random_forest_forecast)
]

# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()

# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
    mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
    
    model_names.append(model_name)
    mae_values.append(mae)
    rmse_values.append(rmse)
    r2_values.append(r2)
    nse_values.append(nse)
    willmott_values.append(willmott)

# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics_df = pd.DataFrame(metrics)

# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)



for model_name, model_func in models:
    _, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
    plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)

plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Min Flow (m3/s)', fontsize=18)

plt.legend(fontsize=14, loc='upper right', bbox_to_anchor=(0.6, 1.15), ncol=1)

plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)

plt.grid(axis='y', linestyle='--')

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()
>expected=-0.7, predicted=-0.7
>expected=3.6, predicted=1.6
>expected=4.0, predicted=1.6
>expected=2.0, predicted=1.2
>expected=3.5, predicted=1.8
>expected=1.1, predicted=1.3
>expected=-0.7, predicted=-0.3
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.5, predicted=-0.5
>expected=0.9, predicted=1.0
>expected=4.1, predicted=2.0
>expected=3.1, predicted=1.9
>expected=0.0, predicted=2.1
>expected=4.0, predicted=1.9
>expected=3.8, predicted=2.7
>expected=-0.3, predicted=0.1
>expected=-0.6, predicted=-0.5
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.3, predicted=-0.3
>expected=-0.3, predicted=-0.4
>expected=0.4, predicted=0.7
>expected=2.4, predicted=2.8
>expected=1.2, predicted=1.7
>expected=2.3, predicted=2.8
>expected=1.4, predicted=1.2
>expected=-0.4, predicted=-0.5
>expected=-0.7, predicted=-0.7
>expected=3.6, predicted=2.2
>expected=4.0, predicted=4.9
>expected=2.0, predicted=2.5
>expected=3.5, predicted=2.0
>expected=1.1, predicted=0.9
>expected=-0.7, predicted=-0.5
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.5, predicted=-0.4
>expected=0.9, predicted=0.5
>expected=4.1, predicted=4.6
>expected=3.1, predicted=4.2
>expected=0.0, predicted=4.4
>expected=4.0, predicted=1.4
>expected=3.8, predicted=1.3
>expected=-0.3, predicted=-0.5
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.3, predicted=-0.3
>expected=-0.3, predicted=-0.4
>expected=0.4, predicted=0.3
>expected=2.4, predicted=1.9
>expected=1.2, predicted=1.6
>expected=2.3, predicted=2.2
>expected=1.4, predicted=1.1
>expected=-0.4, predicted=-0.5
>expected=-0.7, predicted=-0.6
>expected=3.6, predicted=2.4
>expected=4.0, predicted=1.4
>expected=2.0, predicted=0.8
>expected=3.5, predicted=1.0
>expected=1.1, predicted=0.6
>expected=-0.7, predicted=0.2
>expected=-0.5, predicted=0.2
>expected=-0.6, predicted=-0.2
>expected=-0.7, predicted=-0.4
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.6
>expected=-0.5, predicted=-0.5
>expected=0.9, predicted=0.8
>expected=4.1, predicted=2.7
>expected=3.1, predicted=1.4
>expected=0.0, predicted=0.9
>expected=4.0, predicted=0.7
>expected=3.8, predicted=0.7
>expected=-0.3, predicted=0.6
>expected=-0.6, predicted=0.5
>expected=-0.7, predicted=0.4
>expected=-0.7, predicted=0.2
>expected=-0.7, predicted=-0.6
>expected=-0.3, predicted=-0.4
>expected=-0.3, predicted=-0.4
>expected=0.4, predicted=0.3
>expected=2.4, predicted=2.0
>expected=1.2, predicted=1.6
>expected=2.3, predicted=1.9
>expected=1.4, predicted=1.0
>expected=-0.4, predicted=-0.3
>expected=-0.7, predicted=-0.7
>expected=3.6, predicted=2.2
>expected=4.0, predicted=3.2
>expected=2.0, predicted=3.2
>expected=3.5, predicted=1.7
>expected=1.1, predicted=1.5
>expected=-0.7, predicted=-0.5
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.5, predicted=-0.5
>expected=0.9, predicted=0.4
>expected=4.1, predicted=3.1
>expected=3.1, predicted=3.7
>expected=0.0, predicted=3.9
>expected=4.0, predicted=2.2
>expected=3.8, predicted=1.8
>expected=-0.3, predicted=-0.2
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.3, predicted=-0.4
>expected=-0.3, predicted=-0.4
>expected=0.4, predicted=0.2
>expected=2.4, predicted=2.1
>expected=1.2, predicted=1.7
>expected=2.3, predicted=2.4
>expected=1.4, predicted=1.2
>expected=-0.4, predicted=-0.5
>expected=-0.7, predicted=-0.7
>expected=3.6, predicted=1.6
>expected=4.0, predicted=1.6
>expected=2.0, predicted=1.2
>expected=3.5, predicted=1.8
>expected=1.1, predicted=1.3
>expected=-0.7, predicted=-0.3
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.5, predicted=-0.5
>expected=0.9, predicted=1.0
>expected=4.1, predicted=2.0
>expected=3.1, predicted=1.9
>expected=0.0, predicted=2.1
>expected=4.0, predicted=1.9
>expected=3.8, predicted=2.7
>expected=-0.3, predicted=0.1
>expected=-0.6, predicted=-0.5
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.3, predicted=-0.3
>expected=-0.3, predicted=-0.4
>expected=0.4, predicted=0.7
>expected=2.4, predicted=2.8
>expected=1.2, predicted=1.7
>expected=2.3, predicted=2.8
>expected=1.4, predicted=1.2
>expected=-0.4, predicted=-0.5
>expected=-0.7, predicted=-0.7
>expected=3.6, predicted=2.2
>expected=4.0, predicted=4.9
>expected=2.0, predicted=2.5
>expected=3.5, predicted=2.0
>expected=1.1, predicted=0.9
>expected=-0.7, predicted=-0.5
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.5, predicted=-0.4
>expected=0.9, predicted=0.5
>expected=4.1, predicted=4.6
>expected=3.1, predicted=4.2
>expected=0.0, predicted=4.4
>expected=4.0, predicted=1.4
>expected=3.8, predicted=1.3
>expected=-0.3, predicted=-0.5
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.3, predicted=-0.3
>expected=-0.3, predicted=-0.4
>expected=0.4, predicted=0.3
>expected=2.4, predicted=1.9
>expected=1.2, predicted=1.6
>expected=2.3, predicted=2.2
>expected=1.4, predicted=1.1
>expected=-0.4, predicted=-0.5
>expected=-0.7, predicted=-0.6
>expected=3.6, predicted=2.4
>expected=4.0, predicted=1.4
>expected=2.0, predicted=0.8
>expected=3.5, predicted=1.0
>expected=1.1, predicted=0.6
>expected=-0.7, predicted=0.2
>expected=-0.5, predicted=0.2
>expected=-0.6, predicted=-0.2
>expected=-0.7, predicted=-0.4
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.6
>expected=-0.5, predicted=-0.5
>expected=0.9, predicted=0.8
>expected=4.1, predicted=2.7
>expected=3.1, predicted=1.4
>expected=0.0, predicted=0.9
>expected=4.0, predicted=0.7
>expected=3.8, predicted=0.7
>expected=-0.3, predicted=0.6
>expected=-0.6, predicted=0.5
>expected=-0.7, predicted=0.4
>expected=-0.7, predicted=0.2
>expected=-0.7, predicted=-0.6
>expected=-0.3, predicted=-0.4
>expected=-0.3, predicted=-0.4
>expected=0.4, predicted=0.3
>expected=2.4, predicted=2.0
>expected=1.2, predicted=1.6
>expected=2.3, predicted=1.9
>expected=1.4, predicted=1.0
>expected=-0.4, predicted=-0.3
>expected=-0.7, predicted=-0.7
>expected=3.6, predicted=2.2
>expected=4.0, predicted=3.3
>expected=2.0, predicted=3.1
>expected=3.5, predicted=1.9
>expected=1.1, predicted=1.6
>expected=-0.7, predicted=-0.4
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.5, predicted=-0.5
>expected=0.9, predicted=0.4
>expected=4.1, predicted=3.4
>expected=3.1, predicted=3.9
>expected=0.0, predicted=3.8
>expected=4.0, predicted=2.1
>expected=3.8, predicted=1.9
>expected=-0.3, predicted=-0.4
>expected=-0.6, predicted=-0.6
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.7, predicted=-0.7
>expected=-0.3, predicted=-0.4
>expected=-0.3, predicted=-0.4
>expected=0.4, predicted=0.2
>expected=2.4, predicted=2.2
>expected=1.2, predicted=1.6
>expected=2.3, predicted=2.2
>expected=1.4, predicted=1.2
>expected=-0.4, predicted=-0.5
No description has been provided for this image

EVALUATE THE MODELΒΆ

InΒ [Β ]:
# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
Model MAE RMSE R-squared Nash-Sutcliffe Efficiency Willmott's Index of Agreement
0 LightGBM 0.599431 0.980653 0.687825 0.687825 0.887622
1 XGBoost 0.562869 1.109127 0.600671 0.600671 0.892242
2 SVM 0.852233 1.239082 0.501612 0.501612 0.770965
3 Random Forest 0.550639 1.002738 0.673605 0.673605 0.902759
InΒ [Β ]:
metrics = metrics.round(2)

fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')

table = ax.table(cellText=metrics.values,
                 colLabels=metrics.columns,
                 cellLoc='center',
                 loc='center',
                 bbox=[0, 0, 1, 1])

table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)

header_color = '#D0D0D0'
data_color = '#FFFFFF'

for i, key in enumerate(table._cells):
    cell = table._cells[key]
    cell.set_edgecolor('black')
    if key[0] == 0 or key[1] == -1:
        cell.set_text_props(weight='bold')
        cell.set_facecolor(header_color)
    else:
        cell.set_facecolor(data_color)

plt.title('Models Performance on Min Flow')
plt.show()
No description has been provided for this image
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4

# Metrics and Titles
metrics_title = {
    'MAE': 'Mean Absolute Error (MAE)',
    'RMSE': 'Root Mean Squared Error (RMSE)',
    'R-squared': 'R-squared',
    'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
    'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}

# Plot each metric separately
for metric, title in metrics_title.items():
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
    # ax.set_title(title, fontsize=16)
    ax.set_ylabel(metric, fontsize=16)
    ax.tick_params(axis='x', rotation=30, labelsize=16)
    ax.tick_params(axis='y', labelsize=16)
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

10. RUNOFF (m3/s) ANALYSISΒΆ

InΒ [Β ]:
# Get the wind_min values
runoff = transformed_data[transformed_data['Parameter'] == parameters[4]]
runoff.head()
Out[Β ]:
Parameter Date Value
4 Runoff 1995-01-01 2.945
9 Runoff 1996-01-01 9.508
14 Runoff 1997-01-01 6.325
19 Runoff 1998-01-01 15.325
24 Runoff 1999-01-01 10.231
InΒ [Β ]:
# drop the parameter columns
runoff = runoff.drop(columns=['Parameter'])
InΒ [Β ]:
runoff.head()
Out[Β ]:
Date Value
4 1995-01-01 2.945
9 1996-01-01 9.508
14 1997-01-01 6.325
19 1998-01-01 15.325
24 1999-01-01 10.231
InΒ [Β ]:
# sort according to  the date
runoff = runoff.sort_values(by='Date')
runoff.head()
Out[Β ]:
Date Value
4 1995-01-01 2.945
139 1995-02-01 2.507
274 1995-03-01 1.389
409 1995-04-01 7.098
544 1995-05-01 6.384
InΒ [Β ]:
runoff.head()
Out[Β ]:
Date Value
4 1995-01-01 2.945
139 1995-02-01 2.507
274 1995-03-01 1.389
409 1995-04-01 7.098
544 1995-05-01 6.384
InΒ [Β ]:
plt.figure(figsize=(20, 8))
runoff.plot(x='Date', y='Value', kind='line', ax=plt.gca(), color='#ff7f0e', fontsize=12)
plt.xlabel('Date', fontdict={'fontsize':20})
plt.ylabel('Runoff (m3/s)', fontdict={'fontsize':20})
plt.xticks(size = 20)
plt.yticks(size = 20)
plt.grid(linestyle='--')
plt.tight_layout()
plt.legend().remove()
plt.show()
No description has been provided for this image
InΒ [Β ]:
# Add fuller test to check the stationarity of the dataset
def adfuller_test(values):
    result = adfuller(values)
    labels = ['ADF Test Statistic','p-value','#Lags Used', 'Number of Observations Used']
    for value, label in zip(result, labels):
        print(label+' : '+str(value))
    if result[1] <= 0.05:
        print("Strong evidence against the null hypothesis(H0)")
    else:
        print("Weak evidence against the null hypothesis(H0)")
InΒ [Β ]:
# H0: It is non stationary
# H1: It is stationary
InΒ [Β ]:
adfuller_test(runoff['Value'])
ADF Test Statistic : -3.1849102477864863
p-value : 0.020874917485704747
#Lags Used : 11
Number of Observations Used : 312
Strong evidence against the null hypothesis(H0)
InΒ [Β ]:
runoff = runoff[['Date', 'Value']]
runoff.set_index('Date', inplace=True)
runoff.head()
Out[Β ]:
Value
Date
1995-01-01 2.945
1995-02-01 2.507
1995-03-01 1.389
1995-04-01 7.098
1995-05-01 6.384
InΒ [Β ]:
runoff_model_names = []
runoff_mae_values = []
runoff_rmse_values = []
runoff_r2_values = []
runoff_nse_values = []
runoff_willmott_values = []

TRAIN THE MODELΒΆ

InΒ [Β ]:
# Assuming `runoff` is your time series DataFrame
series = runoff
values = series.values
values = add_rolling_features(values, window=3)
values = pd.DataFrame(values).values
scaler = StandardScaler()
values = scaler.fit_transform(values)
data = series_to_supervised(values, n_in=6)
data_df = pd.DataFrame(data, index=series.index[6+2:])
InΒ [Β ]:
# Model names
models = [
    ('LightGBM', lightgbm_forecast),
    ('XGBoost', xgboost_forecast),
    ('SVM', svm_forecast),
    ('Random Forest', random_forest_forecast)
]

# Clear existing lists in case they contain previous values
model_names.clear()
mae_values.clear()
rmse_values.clear()
r2_values.clear()
nse_values.clear()
willmott_values.clear()

# Iterate over each model, perform walk-forward validation, and append metrics to lists
for model_name, model_func in models:
    mae, rmse, r2, nse, willmott, test_index, y, yhat = walk_forward_validation(data_df, model_func)
    
    model_names.append(model_name)
    mae_values.append(mae)
    rmse_values.append(rmse)
    r2_values.append(r2)
    nse_values.append(nse)
    willmott_values.append(willmott)

# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics_df = pd.DataFrame(metrics)

# Plot the actual vs predicted values for each model
plt.figure(figsize=(16, 6))
plt.plot(test_index, y, label='Actual', linestyle='--', color='black', linewidth=2)



for model_name, model_func in models:
    _, _, _, _, _, test_index, _, yhat = walk_forward_validation(data_df, model_func)
    plt.plot(test_index, yhat, label=f'{model_name} Predicted', linestyle='--', color=colors_dict[model_name], linewidth=2)

plt.xlabel('Year and Month', fontsize=18)
plt.ylabel('Scaled Runoff (m3/s)', fontsize=18)

plt.legend(fontsize=14, loc='upper right', bbox_to_anchor=(0.6, 1.15), ncol=1)

plt.xticks(rotation=45, fontsize=14)
plt.yticks(fontsize=14)

plt.grid(axis='y', linestyle='--')

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()
>expected=0.3, predicted=0.2
>expected=3.0, predicted=1.4
>expected=2.7, predicted=1.2
>expected=0.9, predicted=1.6
>expected=2.2, predicted=1.0
>expected=1.2, predicted=1.9
>expected=0.1, predicted=0.5
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.2
>expected=0.8, predicted=0.7
>expected=3.5, predicted=1.8
>expected=2.6, predicted=1.6
>expected=-0.2, predicted=2.1
>expected=1.9, predicted=1.3
>expected=3.2, predicted=1.8
>expected=1.3, predicted=1.6
>expected=-0.8, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.0, predicted=-0.2
>expected=-0.2, predicted=0.1
>expected=0.4, predicted=0.2
>expected=1.9, predicted=1.7
>expected=0.6, predicted=0.8
>expected=1.0, predicted=1.5
>expected=1.1, predicted=1.6
>expected=0.6, predicted=0.5
>expected=0.3, predicted=0.1
>expected=3.0, predicted=1.7
>expected=2.7, predicted=3.5
>expected=0.9, predicted=1.6
>expected=2.2, predicted=1.2
>expected=1.2, predicted=1.8
>expected=0.1, predicted=0.6
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.2
>expected=0.8, predicted=0.6
>expected=3.5, predicted=2.6
>expected=2.6, predicted=3.4
>expected=-0.2, predicted=2.7
>expected=1.9, predicted=1.7
>expected=3.2, predicted=1.6
>expected=1.3, predicted=1.1
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-1.0, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.0, predicted=0.0
>expected=-0.2, predicted=0.1
>expected=0.4, predicted=0.2
>expected=1.9, predicted=1.9
>expected=0.6, predicted=1.3
>expected=1.0, predicted=1.2
>expected=1.1, predicted=1.3
>expected=0.6, predicted=0.5
>expected=0.3, predicted=0.1
>expected=3.0, predicted=2.5
>expected=2.7, predicted=1.5
>expected=0.9, predicted=0.7
>expected=2.2, predicted=1.1
>expected=1.2, predicted=0.8
>expected=0.1, predicted=0.2
>expected=-0.5, predicted=-0.3
>expected=-0.6, predicted=-0.7
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.3
>expected=0.8, predicted=0.8
>expected=3.5, predicted=2.6
>expected=2.6, predicted=1.6
>expected=-0.2, predicted=1.0
>expected=1.9, predicted=1.0
>expected=3.2, predicted=0.9
>expected=1.3, predicted=0.6
>expected=-0.8, predicted=0.4
>expected=-0.9, predicted=0.1
>expected=-1.0, predicted=-0.1
>expected=-0.9, predicted=-0.8
>expected=-0.0, predicted=-0.3
>expected=-0.2, predicted=-0.3
>expected=0.4, predicted=0.2
>expected=1.9, predicted=1.4
>expected=0.6, predicted=0.8
>expected=1.0, predicted=1.3
>expected=1.1, predicted=1.2
>expected=0.6, predicted=0.3
>expected=0.3, predicted=0.0
>expected=3.0, predicted=1.9
>expected=2.7, predicted=2.7
>expected=0.9, predicted=2.3
>expected=2.2, predicted=1.6
>expected=1.2, predicted=1.6
>expected=0.1, predicted=0.4
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.2
>expected=0.8, predicted=0.5
>expected=3.5, predicted=2.3
>expected=2.6, predicted=2.7
>expected=-0.2, predicted=2.7
>expected=1.9, predicted=1.6
>expected=3.2, predicted=1.9
>expected=1.3, predicted=1.6
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-0.9, predicted=-0.9
>expected=-0.0, predicted=-0.1
>expected=-0.2, predicted=0.1
>expected=0.4, predicted=0.2
>expected=1.9, predicted=1.5
>expected=0.6, predicted=0.7
>expected=1.0, predicted=1.4
>expected=1.1, predicted=1.5
>expected=0.6, predicted=0.5
>expected=0.3, predicted=0.2
>expected=3.0, predicted=1.4
>expected=2.7, predicted=1.2
>expected=0.9, predicted=1.6
>expected=2.2, predicted=1.0
>expected=1.2, predicted=1.9
>expected=0.1, predicted=0.5
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.2
>expected=0.8, predicted=0.7
>expected=3.5, predicted=1.8
>expected=2.6, predicted=1.6
>expected=-0.2, predicted=2.1
>expected=1.9, predicted=1.3
>expected=3.2, predicted=1.8
>expected=1.3, predicted=1.6
>expected=-0.8, predicted=-0.7
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.0, predicted=-0.2
>expected=-0.2, predicted=0.1
>expected=0.4, predicted=0.2
>expected=1.9, predicted=1.7
>expected=0.6, predicted=0.8
>expected=1.0, predicted=1.5
>expected=1.1, predicted=1.6
>expected=0.6, predicted=0.5
>expected=0.3, predicted=0.1
>expected=3.0, predicted=1.7
>expected=2.7, predicted=3.5
>expected=0.9, predicted=1.6
>expected=2.2, predicted=1.2
>expected=1.2, predicted=1.8
>expected=0.1, predicted=0.6
>expected=-0.5, predicted=-0.5
>expected=-0.6, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.2
>expected=0.8, predicted=0.6
>expected=3.5, predicted=2.6
>expected=2.6, predicted=3.4
>expected=-0.2, predicted=2.7
>expected=1.9, predicted=1.7
>expected=3.2, predicted=1.6
>expected=1.3, predicted=1.1
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.8
>expected=-1.0, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.0, predicted=0.0
>expected=-0.2, predicted=0.1
>expected=0.4, predicted=0.2
>expected=1.9, predicted=1.9
>expected=0.6, predicted=1.3
>expected=1.0, predicted=1.2
>expected=1.1, predicted=1.3
>expected=0.6, predicted=0.5
>expected=0.3, predicted=0.1
>expected=3.0, predicted=2.5
>expected=2.7, predicted=1.5
>expected=0.9, predicted=0.7
>expected=2.2, predicted=1.1
>expected=1.2, predicted=0.8
>expected=0.1, predicted=0.2
>expected=-0.5, predicted=-0.3
>expected=-0.6, predicted=-0.7
>expected=-0.9, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.3
>expected=0.8, predicted=0.8
>expected=3.5, predicted=2.6
>expected=2.6, predicted=1.6
>expected=-0.2, predicted=1.0
>expected=1.9, predicted=1.0
>expected=3.2, predicted=0.9
>expected=1.3, predicted=0.6
>expected=-0.8, predicted=0.4
>expected=-0.9, predicted=0.1
>expected=-1.0, predicted=-0.1
>expected=-0.9, predicted=-0.8
>expected=-0.0, predicted=-0.3
>expected=-0.2, predicted=-0.3
>expected=0.4, predicted=0.2
>expected=1.9, predicted=1.4
>expected=0.6, predicted=0.8
>expected=1.0, predicted=1.3
>expected=1.1, predicted=1.2
>expected=0.6, predicted=0.3
>expected=0.3, predicted=0.1
>expected=3.0, predicted=2.0
>expected=2.7, predicted=2.4
>expected=0.9, predicted=2.4
>expected=2.2, predicted=1.6
>expected=1.2, predicted=1.5
>expected=0.1, predicted=0.5
>expected=-0.5, predicted=-0.6
>expected=-0.6, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.9
>expected=-0.9, predicted=-0.8
>expected=-0.3, predicted=-0.2
>expected=0.8, predicted=0.6
>expected=3.5, predicted=2.4
>expected=2.6, predicted=2.4
>expected=-0.2, predicted=2.7
>expected=1.9, predicted=1.4
>expected=3.2, predicted=2.0
>expected=1.3, predicted=1.8
>expected=-0.8, predicted=-0.8
>expected=-0.9, predicted=-0.9
>expected=-1.0, predicted=-1.0
>expected=-0.9, predicted=-0.9
>expected=-0.0, predicted=-0.2
>expected=-0.2, predicted=0.2
>expected=0.4, predicted=0.2
>expected=1.9, predicted=1.5
>expected=0.6, predicted=0.6
>expected=1.0, predicted=1.3
>expected=1.1, predicted=1.4
>expected=0.6, predicted=0.4
No description has been provided for this image

EVALUATE THE MODELΒΆ

InΒ [Β ]:
# Create a DataFrame
metrics = {
    'Model': model_names,
    'MAE': mae_values,
    'RMSE': rmse_values,
    'R-squared': r2_values,
    'Nash-Sutcliffe Efficiency': nse_values,
    'Willmott\'s Index of Agreement': willmott_values
}

metrics = pd.DataFrame(metrics)
InΒ [Β ]:
metrics
Out[Β ]:
Model MAE RMSE R-squared Nash-Sutcliffe Efficiency Willmott's Index of Agreement
0 LightGBM 0.516156 0.789551 0.651963 0.651963 0.882968
1 XGBoost 0.445748 0.741400 0.693119 0.693119 0.916503
2 SVM 0.509029 0.725021 0.706528 0.706528 0.889898
3 Random Forest 0.400728 0.703393 0.723776 0.723776 0.922186
InΒ [Β ]:
metrics = metrics.round(2)

fig, ax = plt.subplots(figsize=(20, 4))
ax.axis('off')

table = ax.table(cellText=metrics.values,
                 colLabels=metrics.columns,
                 cellLoc='center',
                 loc='center',
                 bbox=[0, 0, 1, 1])

table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.2, 1.2)

header_color = '#D0D0D0'
data_color = '#FFFFFF'

for i, key in enumerate(table._cells):
    cell = table._cells[key]
    cell.set_edgecolor('black')
    if key[0] == 0 or key[1] == -1:
        cell.set_text_props(weight='bold')
        cell.set_facecolor(header_color)
    else:
        cell.set_facecolor(data_color)

plt.title('Models Performance on Runoff (m3/s)')
plt.show()
No description has been provided for this image
InΒ [Β ]:
colors = ['#27ad81', '#5dc863', '#aadc32', '#fde725']
bar_width = 0.4

# Metrics and Titles
metrics_title = {
    'MAE': 'Mean Absolute Error (MAE)',
    'RMSE': 'Root Mean Squared Error (RMSE)',
    'R-squared': 'R-squared',
    'Nash-Sutcliffe Efficiency': 'Nash-Sutcliffe Efficiency',
    'Willmott\'s Index of Agreement': 'Willmott\'s Index of Agreement'
}

# Plot each metric separately
for metric, title in metrics_title.items():
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.bar(metrics['Model'], metrics[metric], color=colors, width=bar_width)
    # ax.set_title(title, fontsize=16)
    ax.set_ylabel(metric, fontsize=16)
    ax.tick_params(axis='x', rotation=30, labelsize=16)
    ax.tick_params(axis='y', labelsize=16)
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

The EndΒΆ